library(data.table)
library(ggplot2)
library(plotly)
income_data <- fread(paste0(getwd(),"/basic_income_dataset_dalia.csv"))
# DISTRIBUSION BY AGE + GENDER
ggplot(data = income_data, aes(x = age)) +
geom_histogram(aes(y = ..density..)
,col = 'black'
,fill = 'white') +
geom_density(alpha = 0.2, fill='#FF6666') +
facet_grid(gender ~.)

# DISTRIBUSION BY AGE + RURAL
ggplot(data = income_data, aes(x = age)) +
geom_histogram(aes(y = ..density..)
,col = 'black'
,fill = 'white') +
geom_density(alpha = 0.2, fill='#FF6666') +
facet_grid(rural ~.)

# DISTRIBUSION BY AGE + EDUCATION
income_data[dem_education_level == 'no', dem_education_level:="no"]
income_data[dem_education_level == 'low', dem_education_level:="3. low"]
income_data[dem_education_level == 'medium', dem_education_level:="2. medium"]
income_data[dem_education_level == 'high', dem_education_level:="1. high"]
ggplotly(
ggplot(data = income_data, aes(x = age, fill = dem_education_level)) +
geom_histogram(bins = 50
,col = 'black'
,alpha = 0.7)
)
ggplotly(
ggplot(data = income_data, aes(x = age, fill = dem_education_level)) +
geom_histogram(bins = 50
,position = "fill"
,col = 'black'
,alpha = 0.5)
)
prop.table(table(income_data$dem_education_level, income_data$age_group),margin = 2)
##
## 14_25 26_39 40_65
## 1. high 0.24855156 0.45878811 0.35237888
## 2. medium 0.41309386 0.35829883 0.41559852
## 3. low 0.28679027 0.15205119 0.19900065
## no 0.05156431 0.03086187 0.03302194
prob_t <- data.table(prop.table(table(income_data$dem_education_level, income_data$age_group),margin = 2))
names(prob_t) <- c('dem_education_level', 'age_group', 'probability')
ggplotly(
ggplot(data = prob_t, aes(x = age_group, y = probability, fill = dem_education_level)) +
geom_bar(stat = 'identity'
,alpha=0.7
,col = 'black')
)